{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import datetime\n",
    "start = time.time()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import loompy\n",
    "import scanpy as sc\n",
    "import pandas as pd\n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.settings.verbosity = 3  \n",
    "sc.logging.print_header() \n",
    "sc.settings.set_figure_params(dpi=80)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTAmp = sc.read_loom('../looms-CR5-r-102/WTA.loom')\n",
    "WTIsth = sc.read_loom('../looms-CR5-r-102/WTI.loom')\n",
    "cKOAmp = sc.read_loom('../looms-CR5-r-102/cKOA.loom')\n",
    "cKOIsth = sc.read_loom('../looms-CR5-r-102/cKOI.loom')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTAmp.var_names_make_unique()\n",
    "WTIsth.var_names_make_unique()\n",
    "cKOAmp.var_names_make_unique()\n",
    "cKOIsth.var_names_make_unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTAmpIsth = WTAmp.concatenate(WTIsth, batch_key='Region', batch_categories=['Ampulla','Isthmus'])\n",
    "cKOAmpIsth = cKOAmp.concatenate(cKOIsth, batch_key='Region', batch_categories=['Ampulla','Isthmus'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth = WTAmpIsth.concatenate(cKOAmpIsth, batch_key='Genotype', batch_categories=['WT','cKO'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth.write('./WTcKOAmpIsth.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTAmp.write('./WTInfAmp.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTIsth.write('./WTIsthUTJ.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cKOAmp.write('./cKOInfAmp.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cKOIsth.write('./cKOIsthUTJ.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import scanpy as sc\n",
    "import matplotlib.pyplot as plt\n",
    "import glob\n",
    "import seaborn as sns\n",
    "import bbknn\n",
    "from pandas.core.index import RangeIndex\n",
    "from matplotlib import rcParams\n",
    "from matplotlib.pyplot import rc_context"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.settings.verbosity = 3   \n",
    "sc.logging.print_header() \n",
    "sc.settings.set_figure_params(dpi=80, facecolor='white') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth = sc.read( './WTcKOAmpIsth.h5ad') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth.var_names_make_unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.highest_expr_genes(WTcKOAmpIsth, n_top=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pp.filter_cells(WTcKOAmpIsth, min_genes=200) \n",
    "sc.pp.filter_genes(WTcKOAmpIsth, min_cells=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "mito_genes = WTcKOAmpIsth.var_names.str.startswith('mt') \n",
    "WTcKOAmpIsth.obs['pct_counts_mt'] = np.sum(\n",
    "    WTcKOAmpIsth[:, mito_genes].X, axis=1).A1 / np.sum(WTcKOAmpIsth.X, axis=1).A1 \n",
    "WTcKOAmpIsth.obs['total_counts'] = WTcKOAmpIsth.X.sum(axis=1).A1 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "sc.pl.violin(WTcKOAmpIsth, ['n_genes'], jitter=0.4)\n",
    "sc.pl.violin(WTcKOAmpIsth, ['total_counts'], jitter=0.4)\n",
    "sc.pl.violin(WTcKOAmpIsth, ['pct_counts_mt'], jitter=0.4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.scatter(WTcKOAmpIsth, x='total_counts', y='pct_counts_mt')\n",
    "# sc.pl.scatter(WTcKOAmpIsth, x='total_counts', y='n_genes_by_counts')\n",
    "sc.pl.scatter(WTcKOAmpIsth, x='total_counts', y='n_genes')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth = WTcKOAmpIsth[WTcKOAmpIsth.obs.n_genes < 4000, :]\n",
    "WTcKOAmpIsth = WTcKOAmpIsth[WTcKOAmpIsth.obs.pct_counts_mt < 0.1, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pp.normalize_total(WTcKOAmpIsth, target_sum=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pp.log1p(WTcKOAmpIsth)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pp.regress_out(WTcKOAmpIsth, ['total_counts', 'pct_counts_mt'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pp.highly_variable_genes(WTcKOAmpIsth, min_mean=0.0125, max_mean=3, min_disp=0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.highly_variable_genes(WTcKOAmpIsth)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth.raw = WTcKOAmpIsth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth = WTcKOAmpIsth[:, WTcKOAmpIsth.var.highly_variable]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pp.scale(WTcKOAmpIsth, max_value=10) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.tl.pca(WTcKOAmpIsth, svd_solver='arpack')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.pca(WTcKOAmpIsth, color='Genotype')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.pca_variance_ratio(WTcKOAmpIsth, log=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.pca(WTcKOAmpIsth, wspace=0.35,\n",
    "          color=['Region', 'Genotype', 'Epcam', 'Ovgp1', 'Foxj1', 'Myh11', 'Cd52', 'Pecam1', \n",
    "                  \n",
    "                  'Pdgfra', 'Twist2', 'Igf1', 'Dcn', 'Serpina1e', 'Myb', 'Pax8', 'Pax2', 'Cxcl17', 'Id2', 'Pdxk', 'Ccdc153'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.pca_variance_ratio(WTcKOAmpIsth, n_pcs=37, log=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "\n",
    "sc.pp.neighbors(WTcKOAmpIsth, n_neighbors=10, n_pcs=37)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "sc.tl.leiden(WTcKOAmpIsth)\n",
    "sc.tl.paga(WTcKOAmpIsth)\n",
    "sc.pl.paga(WTcKOAmpIsth, plot=False)  # remove `plot=False` if you want to see the coarse-grained graph\n",
    "sc.tl.umap(WTcKOAmpIsth, init_pos='paga')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "sc.tl.umap(WTcKOAmpIsth)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "sc.tl.leiden(WTcKOAmpIsth, resolution=0.3) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.umap(WTcKOAmpIsth,color=['leiden','Genotype', 'Region'],legend_loc='on data')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "sc.pl.umap(WTcKOAmpIsth,\n",
    "           color=['leiden', 'Region', 'Genotype', 'Epcam', 'Ovgp1', 'Foxj1', 'Myh11', 'Cd52', 'Ptprc', 'Pecam1', \n",
    "                  \n",
    "                  'Pdgfra', 'Twist2', 'Igf1', 'Dcn', 'Serpina1e', 'Myb', 'Pax8', 'Pax2', 'Cxcl17', 'Id2', 'Pdxk', 'Ccdc153', 'Il22'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth.obs.groupby([\"leiden\",\"Genotype\",\"Region\"]).apply(len)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "marker_genes = ['Epcam', 'Krt8','Krt19','Vim','Pdgfra','Twist2', 'Sprr2f',\n",
    "                'Acta2', 'Myh11','Foxj1', 'Ccdc153', 'Ovgp1','Wt1','Serpina1e',\n",
    "                'S100g','Pecam1', 'Ptprc', 'Pgr', 'Esr1', 'Crabp2', 'Cd44', 'Pdxk', \n",
    "                 ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.umap(WTcKOAmpIsth, color=marker_genes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth.obs.groupby([\"leiden\",\"Genotype\",\"Region\"]).apply(len)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_cluster_names = [\n",
    "     '0 Secretory_IsthUTJ',\n",
    "     '1 Secretory_InfAmpIsthUTJ',\n",
    "     '2 Ciliated_InfAmp',\n",
    "     '3 Secretory_InfAmp',\n",
    "     '4 Fibroblast_Pdgfra-',\n",
    "     '5 Ciliated_IsthUTJ',\n",
    "     '6 Fibroblast_Pdgfra+',\n",
    "     '7 Muscle',\n",
    "     '8 Immune1',\n",
    "     '9 Secretory_IsthUTJ_subset',\n",
    "     '10 Endothelial',\n",
    "    '11 Immune2',\n",
    "    '12 Unidentified1',\n",
    "      '13 Unidentified2'\n",
    "]\n",
    "WTcKOAmpIsth.rename_categories('leiden', new_cluster_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(8, 2))\n",
    "for i in range(28):\n",
    "    plt.scatter(i, 1, c=sc.pl.palettes.zeileis_28[i], s=200)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "zeileis_colors = np.array(sc.pl.palettes.zeileis_28)\n",
    "new_colors = np.array(WTcKOAmpIsth.uns['leiden_colors'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_colors[[0]] = zeileis_colors[[7]]  \n",
    "new_colors[[1]] = zeileis_colors[[0]]  \n",
    "new_colors[[2]] = zeileis_colors[[5]]\n",
    "new_colors[[3]] = zeileis_colors[[6]]  \n",
    "new_colors[[4]] = zeileis_colors[[16]] \n",
    "new_colors[[5]] = zeileis_colors[[4]] \n",
    "new_colors[[6]] = zeileis_colors[[17]] \n",
    "new_colors[[7]] = zeileis_colors[[24]] \n",
    "new_colors[[8]] = zeileis_colors[[12]] \n",
    "new_colors[[9]] = zeileis_colors[[8]] \n",
    "new_colors[[10]] = zeileis_colors[[26]] \n",
    "new_colors[[11]] = zeileis_colors[[13]] \n",
    "new_colors[[12]] = zeileis_colors[[20]] \n",
    "new_colors[[13]] = zeileis_colors[[19]] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth.uns['leiden_colors'] = new_colors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.umap(WTcKOAmpIsth, color='leiden', legend_loc='right margin', legend_fontsize='8.0',\n",
    "           frameon=False, save='Wnt7aPgr_0.5dpc_Leiden_AllPops060424.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_cluster_names = [\n",
    "    'WT',\n",
    "    'cKO'\n",
    "]\n",
    "WTcKOAmpIsth.rename_categories('Genotype', new_cluster_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(8, 2))\n",
    "for i in range(28):\n",
    "    plt.scatter(i, 1, c=sc.pl.palettes.zeileis_28[i], s=200)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_cluster_names = [\n",
    "    'WT',\n",
    "    'cKO'\n",
    "]\n",
    "WTcKOAmpIsth.rename_categories('Genotype', new_cluster_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "zeileis_colors = np.array(sc.pl.palettes.zeileis_28)\n",
    "new_colors = np.array(WTcKOAmpIsth.uns['Genotype_colors'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_colors[[0]] = zeileis_colors[[0]]  #0 WT\n",
    "new_colors[[1]] = zeileis_colors[[17]]  #1 cKO"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth.uns['Genotype_colors'] = new_colors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.umap(WTcKOAmpIsth, color='Genotype', legend_loc='right margin', legend_fontsize='8.0',\n",
    "           frameon=False, save='Wnt7aPgr_0.5dpc_Genotype_AllPops060424.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_cluster_names = [\n",
    "    'InfAmp',\n",
    "    'IsthUTJ'\n",
    "]\n",
    "WTcKOAmpIsth.rename_categories('Region', new_cluster_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(8, 2))\n",
    "for i in range(28):\n",
    "    plt.scatter(i, 1, c=sc.pl.palettes.zeileis_28[i], s=200)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "zeileis_colors = np.array(sc.pl.palettes.zeileis_28)\n",
    "new_colors = np.array(WTcKOAmpIsth.uns['Region_colors'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_colors[[0]] = zeileis_colors[[9]]  #0 InfAmp\n",
    "new_colors[[1]] = zeileis_colors[1]  #1 IsthUTJ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth.uns['Region_colors'] = new_colors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.umap(WTcKOAmpIsth, color='Region', legend_loc='right margin', legend_fontsize='8.0',\n",
    "           frameon=False, save='Wnt7aPgr_0.5dpc_Region_AllPops060424.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ax = sc.pl.dotplot(WTcKOAmpIsth, \n",
    "                   marker_genes, groupby='leiden', dendrogram=True, color_map='Blues',\n",
    "                  save='_Wnt7aPgr 0.5dpc_AllPops060424')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ax = sc.pl.dotplot(WTcKOAmpIsth, \n",
    "                   marker_genes, groupby='Genotype', dendrogram=True, color_map='Blues',\n",
    "                  save='_Wnt7aPgr 0.5dpc Genoptype_AllPops060424')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.tl.rank_genes_groups(WTcKOAmpIsth, 'Genotype', method='wilcoxon',n_genes=1000)\n",
    "sc.pl.rank_genes_groups(WTcKOAmpIsth, n_genes=1000, sharey=False)\n",
    "df = pd.DataFrame(WTcKOAmpIsth.uns['rank_genes_groups']['names']).head(1000)\n",
    "df.to_csv('./WTcKOAmpIsth_Genotype_AllPops060424_top1000.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#subsetting\n",
    "WTcKOAmpIsth_IA = WTcKOAmpIsth[WTcKOAmpIsth.obs['Region'].isin(['0 InfAmp'])]\n",
    "WTcKOAmpIsth_IU = WTcKOAmpIsth[WTcKOAmpIsth.obs['Region'].isin(['1 IsthUTJ'])]\n",
    "WTcKOAmpIsth_Immune = WTcKOAmpIsth[WTcKOAmpIsth.obs['leiden'].isin(['8 Immune1','11 Immune2'])]\n",
    "WTcKOAmpIsth_Muscle = WTcKOAmpIsth[WTcKOAmpIsth.obs['leiden'].isin(['7 Muscle'])]\n",
    "WTcKOAmpIsth_Secretory = WTcKOAmpIsth[WTcKOAmpIsth.obs['leiden'].isin(['0 Secretory_IsthUTJ','1 Secretory_InfAmpIsthUTJ',\n",
    "                                                                   '3 Secretory_InfAmp','9 Secretory_IsthUTJ_subset'])]\n",
    "WTcKOAmpIsth_Ciliated = WTcKOAmpIsth[WTcKOAmpIsth.obs['leiden'].isin(['2 Ciliated_InfAmp','5 Ciliated_IsthUTJ'])]\n",
    "WTcKOAmpIsth_Fibroblast = WTcKOAmpIsth[WTcKOAmpIsth.obs['leiden'].isin(['4 Fibroblast_Pdgfra-','6 Fibroblast_Pdgfra+'])]\n",
    "WTcKOAmpIsth_Epithelial = WTcKOAmpIsth[WTcKOAmpIsth.obs['leiden'].isin([\n",
    "    '0 Secretory_IsthUTJ','1 Secretory_InfAmpIsthUTJ','3 Secretory_InfAmp','9 Secretory_IsthUTJ_subset',\n",
    "'2 Ciliated_InfAmp','5 Ciliated_IsthUTJ'])]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth_IA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth_IU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth_Immune"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth_Muscle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth_Secretory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth_Ciliated"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth_Fibroblast"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "WTcKOAmpIsth_Epithelial"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "sc.tl.pca(WTcKOAmpIsth_Immune, svd_solver='arpack')\n",
    "sc.tl.pca(WTcKOAmpIsth_Muscle, svd_solver='arpack')\n",
    "sc.tl.pca(WTcKOAmpIsth_Secretory, svd_solver='arpack')\n",
    "sc.tl.pca(WTcKOAmpIsth_Ciliated, svd_solver='arpack')\n",
    "sc.tl.pca(WTcKOAmpIsth_Fibroblast, svd_solver='arpack')\n",
    "sc.tl.pca(WTcKOAmpIsth_Epithelial, svd_solver='arpack')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.pca_variance_ratio(WTcKOAmpIsth_Immune, n_pcs=37, log=True)\n",
    "sc.pl.pca_variance_ratio(WTcKOAmpIsth_Muscle, n_pcs=37, log=True)\n",
    "sc.pl.pca_variance_ratio(WTcKOAmpIsth_Secretory, n_pcs=37, log=True)\n",
    "sc.pl.pca_variance_ratio(WTcKOAmpIsth_Ciliated, n_pcs=37, log=True)\n",
    "sc.pl.pca_variance_ratio(WTcKOAmpIsth_Fibroblast, n_pcs=37, log=True)\n",
    "sc.pl.pca_variance_ratio(WTcKOAmpIsth_Epithelial, n_pcs=37, log=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pp.neighbors(WTcKOAmpIsth_Immune, n_neighbors=10, n_pcs=37)\n",
    "sc.pp.neighbors(WTcKOAmpIsth_Muscle, n_neighbors=10, n_pcs=37)\n",
    "sc.pp.neighbors(WTcKOAmpIsth_Secretory, n_neighbors=10, n_pcs=37)\n",
    "sc.pp.neighbors(WTcKOAmpIsth_Ciliated, n_neighbors=10, n_pcs=37)\n",
    "sc.pp.neighbors(WTcKOAmpIsth_Fibroblast, n_neighbors=10, n_pcs=37)\n",
    "sc.pp.neighbors(WTcKOAmpIsth_Epithelial, n_neighbors=10, n_pcs=37)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.tl.umap(WTcKOAmpIsth_Immune)\n",
    "sc.tl.umap(WTcKOAmpIsth_Muscle)\n",
    "sc.tl.umap(WTcKOAmpIsth_Secretory)\n",
    "sc.tl.umap(WTcKOAmpIsth_Ciliated)\n",
    "sc.tl.umap(WTcKOAmpIsth_Fibroblast)\n",
    "sc.tl.umap(WTcKOAmpIsth_Epithelial)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gene_dict = ['Epcam', 'Ovgp1', 'Foxj1', 'Myh11', 'Ptprc', 'Pecam1', 'Myh11',\n",
    "                  \n",
    "                  'Pdgfra', 'Twist2', 'Igf1', 'Dcn', 'Serpina1e', 'Myb', 'Pax8', 'Pax2', 'Cxcl17', 'Id2', \n",
    "             'Pdxk', 'Ccdc153', 'Il22', 'Il22ra1', 'Il22ra2', 'Stat3']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.umap(WTcKOAmpIsth_Immune, color=gene_dict)\n",
    "sc.pl.umap(WTcKOAmpIsth_Muscle, color=gene_dict)\n",
    "sc.pl.umap(WTcKOAmpIsth_Secretory, color=gene_dict)\n",
    "sc.pl.umap(WTcKOAmpIsth_Ciliated, color=gene_dict)\n",
    "sc.pl.umap(WTcKOAmpIsth_Fibroblast, color=gene_dict)\n",
    "sc.pl.umap(WTcKOAmpIsth_Epithelial, color=gene_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.umap(WTcKOAmpIsth_Immune, color='leiden', legend_loc='right margin', legend_fontsize='8.0',\n",
    "           frameon=False, save='Wnt7aPgr_0.5dpc_ImmuneOnly_Leiden060724.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#WT and cKO Genes of interest - immune cells\n",
    "sc.pl.dotplot(WTcKOAmpIsth_Immune, ['Il1b', 'Il22', 'Il22ra2', 'Il10', 'Il15', 'Il17a', 'Il18', 'Ifng', 'Tlr2', 'Tlr3', \n",
    "                                                   'Tlr4', 'Tlr5', 'Tlr6', 'Tlr7', 'Tlr8', 'Tlr9', 'Ccl24'],\n",
    "groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Immune_Genes of interest_immune genes_WTvsCKO_20240606.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#WT and cKO Genes of interest - immune cells\n",
    "sc.pl.dotplot(WTcKOAmpIsth_Immune, ['Il22', 'Il22ra2', 'Il10', 'Il15', 'Il17a', 'Il18', 'Ifng', 'Tlr2', 'Tlr3', \n",
    "                                                   'Tlr4', 'Tlr5', 'Tlr6', 'Tlr7', 'Tlr8', 'Tlr9', 'Ccl24'],\n",
    "groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Immune_Genes of interest_immune genes2_WTvsCKO_20240606.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#WT and cKO Genes of interest - immune cells\n",
    "sc.pl.dotplot(WTcKOAmpIsth_Epithelial, ['Il1b', 'Il22', 'Il22ra2', 'Il10', 'Il15', 'Il17a', 'Il18', 'Ifng', 'Tlr2', 'Tlr3', \n",
    "                                                   'Tlr4', 'Tlr5', 'Tlr6', 'Tlr7', 'Tlr8', 'Tlr9', 'Ccl24'],\n",
    "groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Epithelial_Genes of interest_immune genes2_WTvsCKO_20240606.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#WT and cKO Genes of interest - immune cells\n",
    "sc.pl.dotplot(WTcKOAmpIsth_Secretory, ['Il1b', 'Il22', 'Il22ra2', 'Il10', 'Il15', 'Il17a', 'Il18', 'Ifng', 'Tlr2', 'Tlr3', \n",
    "                                                   'Tlr4', 'Tlr5', 'Tlr6', 'Tlr7', 'Tlr8', 'Tlr9', 'Ccl24'],\n",
    "groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Secretory_Genes of interest_immune genes2_WTvsCKO_20240606.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#WT and cKO Genes of interest - immune cells\n",
    "sc.pl.dotplot(WTcKOAmpIsth_Fibroblast, ['Il1b', 'Il22', 'Il22ra2', 'Il10', 'Il15', 'Il17a', 'Il18', 'Ifng', 'Tlr2', 'Tlr3', \n",
    "                                                   'Tlr4', 'Tlr5', 'Tlr6', 'Tlr7', 'Tlr8', 'Tlr9', 'Ccl24'],\n",
    "groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Fibroblast_Genes of interest_immune genes2_WTvsCKO_20240606.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#WT and cKO Genes of interest - immune cells\n",
    "sc.pl.dotplot(WTcKOAmpIsth_Ciliated, ['Il1b', 'Il22', 'Il22ra2', 'Il10', 'Il15', 'Il17a', 'Il18', 'Ifng', 'Tlr2', 'Tlr3', \n",
    "                                                   'Tlr4', 'Tlr5', 'Tlr6', 'Tlr7', 'Tlr8', 'Tlr9', 'Ccl24'],\n",
    "groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Ciliated_Genes of interest_immune genes2_WTvsCKO_20240606.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "markers = ['Il1b', 'Il22', 'Il10', 'Il18', 'Ifng', 'Tlr2', 'Tlr4']\n",
    "sc.pl.stacked_violin(WTcKOAmpIsth_Epithelial, markers, groupby='leiden', dendrogram=False, color_map='Blues')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "markers = ['Il1b', 'Il22', 'Il10', 'Il18', 'Ifng', 'Tlr2', 'Tlr4']\n",
    "sc.pl.stacked_violin(WTcKOAmpIsth_Epithelial, markers, groupby='Genotype', dendrogram=True, color_map='Blues')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.dotplot(WTcKOAmpIsth_Immune, ['Il1b', 'Il10', 'Il15', 'Il17a', 'Il18', 'Il22', 'Ifng', 'Tlr2', 'Tlr4'], groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Immune_IL1b,10,15,17a,18,22,Ifng,Tlr2,Tlr4_WTvsCKO_20240606.pdf')\n",
    "                                    \n",
    "sc.pl.dotplot(WTcKOAmpIsth_Epithelial, ['Il1b', 'Il10', 'Il15', 'Il17a', 'Il18', 'Il22', 'Ifng', 'Tlr2', 'Tlr4'], groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Epithelial_IL1b,10,15,17a,18,22,Ifng,Tlr2,Tlr4_WTvsCKO_20240606.pdf')\n",
    "\n",
    "sc.pl.dotplot(WTcKOAmpIsth_Secretory, ['Il1b', 'Il10', 'Il15', 'Il17a', 'Il18', 'Il22', 'Ifng', 'Tlr2', 'Tlr4'], groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Secretory_IL1b,10,15,17a,18,22,Ifng,Tlr2,Tlr4_WTvsCKO_20240606.pdf')\n",
    "                                    \n",
    "sc.pl.dotplot(WTcKOAmpIsth_Ciliated, ['Il1b', 'Il10', 'Il15', 'Il17a', 'Il18', 'Il22', 'Ifng', 'Tlr2', 'Tlr4'], groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Ciliated_IL1b,10,15,17a,18,22,Ifng,Tlr2,Tlr4_WTvsCKO_20240606.pdf')\n",
    "\n",
    "sc.pl.dotplot(WTcKOAmpIsth_Fibroblast, ['Il1b', 'Il10', 'Il15', 'Il17a', 'Il18', 'Il22', 'Ifng', 'Tlr2', 'Tlr4'], groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Fibroblast_IL1b,10,15,17a,18,22,Ifng,Tlr2,Tlr4_WTvsCKO_20240606.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.pl.dotplot(WTcKOAmpIsth_Immune, ['Il10ra', 'Il10rb','Stat1','Stat3','Map2k1','Mapk8','Mapk14','Jak1','Tyk2'], groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Immune_IL22Signal_WTvsCKO_20240606.pdf')\n",
    "                                    \n",
    "sc.pl.dotplot(WTcKOAmpIsth_Epithelial, ['Il10ra', 'Il10rb','Stat1','Stat3','Map2k1','Mapk8','Mapk14','Jak1','Tyk2'], groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Epithelial_IL22Signal_WTvsCKO_20240606.pdf')\n",
    "\n",
    "sc.pl.dotplot(WTcKOAmpIsth_Secretory, ['Il10ra', 'Il10rb','Stat1','Stat3','Map2k1','Mapk8','Mapk14','Jak1','Tyk2'], groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Secretory_IL22Signal_WTvsCKO_20240606.pdf')\n",
    "                                    \n",
    "sc.pl.dotplot(WTcKOAmpIsth_Ciliated, ['Il10ra', 'Il10rb','Stat1','Stat3','Map2k1','Mapk8','Mapk14','Jak1','Tyk2'], groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Ciliated_IL22Signal_20240606.pdf')\n",
    "\n",
    "sc.pl.dotplot(WTcKOAmpIsth_Fibroblast, ['Il10ra', 'Il10rb','Stat1','Stat3','Map2k1','Mapk8','Mapk14','Jak1','Tyk2'], groupby='Genotype', color_map='Blues', \n",
    "             save='_WTcKOAmpIsth_Fibroblast_IL22Signal_20240606.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.tl.rank_genes_groups(WTcKOAmpIsth_Immune, 'Genotype', method='wilcoxon')\n",
    "sc.tl.rank_genes_groups(WTcKOAmpIsth_Epithelial, 'Genotype', method='wilcoxon')\n",
    "sc.tl.rank_genes_groups(WTcKOAmpIsth_Secretory, 'Genotype', method='wilcoxon')\n",
    "sc.tl.rank_genes_groups(WTcKOAmpIsth_Ciliated, 'Genotype', method='wilcoxon')\n",
    "sc.tl.rank_genes_groups(WTcKOAmpIsth_Fibroblast, 'Genotype', method='wilcoxon')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.tl.dendrogram(WTcKOAmpIsth, 'leiden')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ax = sc.pl.dendrogram(WTcKOAmpIsth, 'leiden')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.tl.rank_genes_groups(WTcKOAmpIsth_Immune, 'Genotype', method='wilcoxon',n_genes=1000)\n",
    "sc.pl.rank_genes_groups(WTcKOAmpIsth_Immune, n_genes=1000, sharey=False)\n",
    "df_InfAmp = pd.DataFrame(WTcKOAmpIsth_Immune.uns['rank_genes_groups']['names']).head(1000)\n",
    "df_InfAmp.to_csv('./WTcKOAmpIsth_Immune_Genotype_top1000_060624.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.tl.rank_genes_groups(WTcKOAmpIsth_Epithelial, 'Genotype', method='wilcoxon',n_genes=1000)\n",
    "sc.pl.rank_genes_groups(WTcKOAmpIsth_Epithelial, n_genes=1000, sharey=False)\n",
    "df_InfAmp = pd.DataFrame(WTcKOAmpIsth_Epithelial.uns['rank_genes_groups']['names']).head(1000)\n",
    "df_InfAmp.to_csv('./WTcKOAmpIsth_Epithelial_Genotype_top1000_060624.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.tl.rank_genes_groups(WTcKOAmpIsth_Fibroblast, 'Genotype', method='wilcoxon',n_genes=1000)\n",
    "sc.pl.rank_genes_groups(WTcKOAmpIsth_Fibroblast, n_genes=1000, sharey=False)\n",
    "df_InfAmp = pd.DataFrame(WTcKOAmpIsth_Fibroblast.uns['rank_genes_groups']['names']).head(1000)\n",
    "df_InfAmp.to_csv('./WTcKOAmpIsth_Fibroblast_Genotype_top1000_060624.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.tl.rank_genes_groups(WTcKOAmpIsth_Secretory, 'Genotype', method='wilcoxon',n_genes=1000)\n",
    "sc.pl.rank_genes_groups(WTcKOAmpIsth_Secretory, n_genes=1000, sharey=False)\n",
    "df_InfAmp = pd.DataFrame(WTcKOAmpIsth_Secretory.uns['rank_genes_groups']['names']).head(1000)\n",
    "df_InfAmp.to_csv('./WTcKOAmpIsth_Secretory_Genotype_top1000_060624.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.tl.rank_genes_groups(WTcKOAmpIsth_Ciliated, 'Genotype', method='wilcoxon',n_genes=1000)\n",
    "sc.pl.rank_genes_groups(WTcKOAmpIsth_Ciliated, n_genes=1000, sharey=False)\n",
    "df_InfAmp = pd.DataFrame(WTcKOAmpIsth_Ciliated.uns['rank_genes_groups']['names']).head(1000)\n",
    "df_InfAmp.to_csv('./WTcKOAmpIsth_Ciliated_Genotype_top1000_060624.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.tl.rank_genes_groups(WTcKOAmpIsth_Muscle, 'Genotype', method='wilcoxon',n_genes=1000)\n",
    "sc.pl.rank_genes_groups(WTcKOAmpIsth_Muscle, n_genes=1000, sharey=False)\n",
    "df_InfAmp = pd.DataFrame(WTcKOAmpIsth_Muscle.uns['rank_genes_groups']['names']).head(1000)\n",
    "df_InfAmp.to_csv('./WTcKOAmpIsth_Muscle_Genotype_top1000_060624.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "winuthayanon-lab-191",
   "language": "python",
   "name": "winuthayanon-lab-191"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
